'''
Created on 2017年9月12日

@author: zhangyanfeng
'''
from pyspark import SparkContext, SparkConf, rdd

sc = SparkContext( 'local', 'pyspark')

#union合并 操作 包含 过滤 
inputRdd = sc.parallelize(["zhangsan","zhaoliu","lisi"]);
inputRDD = inputRdd.filter(lambda x: "zhaoliu" in x)

inputRdd2 = sc.parallelize(["zhangsan","lisi"]);
inputRDD2 = inputRdd2.filter(lambda x: "lisi" in x)

#合并
badLinesRDD = inputRDD.union(inputRDD2)
print(badLinesRDD.collect())


